## R-script for paper:
# Role of Governance in Japan's Renewable Energy Transition: Case study on Solar
## August 2018

# Packages ----------------------------------------------------------------

library("utf8")
library("zeligverse")
library("Amelia")
library("MASS")
library("vcd")
library("car")
library("pscl")
library("ggplot2")
library("boot")
library("data.table")
library("gmodels")
library("cmvnorm")
library("stats")
library("BaylorEdPsych")
library("dplyr")
library("glmnet")
library("mgcv")
library("scales")
library("gtools")
library("plyr")
library("DHARMa")
library("AER")
library("mfx")

# Multiple Imputation (Population) ------------------------------------------------------------
summary(Pop)
## Count number of missing values
missingvalues <- table(is.na(Pop))
missingvalues # TRUE = missing data, FALSE = present data -> 258 missing data cells
missingvalues[2]/missingvalues[1]*100 # Only 0.12% of data used for multiple imputation below is missing.
remove(missingvalues)
## Create logical bounds matrix to make all randomly selected numbers fit within the min and max of original dataset.
Pop_logical_bounds_matrix <- data.matrix(Pop_logical_bounds, rownames.force = NA)
## idvars excludes character columns that otherwise confuse amelia's multiple imputation process;
## idvars also excludes several variables (all 10MWplus) that amelia could not impute due to colinearity.
idvars = c("Code", "Municipality_Prefecture_Japanese", 
           "Municipality_Japanese", "Prefecture_English", 
           "Municipality_Prefecture_English", 
           "Regional_Power_Company", "Transmission_stations","Transmission_stations_per_capita", 
           "Substation_capacity", "Substation_capacity_per_capita",
           "Substations", "Substations_per_capita","Utility_customers",
           "Hokkaido_EPCO", "Tohoku_EPCO", "Hokuriku_EPCO", "Tokyo_EPCO",
           "Chubu_EPCO","Kansai_EPCO","Chugoku_EPCO","Shikoku_EPCO","Kyushu_EPCO","Okinawa_EPCO",
           "Hokkaido", "Aomori", "Akita","Iwate", "Yamagata", "Miyagi",
           "Fukushima","Tochigi","Ibaraki","Gunma","Saitama","Chiba",
           "Tokyo","Kanagawa","Niigata","Nagano","Yamanashi","Shizuoka",
           "Toyama","Ishikawa","Fukui","Gifu","Aichi","Shiga","Mie",
           "Kyoto","Nara","Wakayama","Osaka","Hyogo","Tokushima","Kagawa",
           "Kochi","Ehime","Okayama","Tottori","Shimane","Hiroshima","Yamaguchi",
           "Oita","Fukuoka","Saga","Nagasaki","Kumamoto","Miyazaki","Kagoshima","Okinawa")
a.out.pop <- amelia(x = Pop, m=5, idvars = idvars, max.resample = 10000, bounds = Pop_logical_bounds_matrix, seed = 1234) ## seed allows others to replicate this exact series of randomly imputed numbers
remove(idvars)
summary(a.out.pop) ##a.out is multiply imputed dataset

# Variable Standardization -----------------------------------------------------------------------
for(i in 1:5){
  df <- data.table(a.out.pop$imputations[[i]])
  df$SP_total <- (df$SP_10_49_kW+df$SP_50_499_kW+df$SP_500_1999kW+df$`SP_2000_kW+`)
  df$PV_output_0_100 <- rescale(df$PV_output_2018, to = c(0, 100), na.rm = TRUE, finite = TRUE)
  df$Windspeed_0_100 <- rescale(df$Windspeed_2018, to = c(0, 100), na.rm = TRUE, finite = TRUE)
  df$Forest_cover_0_100 <- rescale(df$PercentForest_cover, to = c(0,100), na.rm = TRUE, finite = TRUE)
  df$Area_inhabitable_0_100 <- rescale(df$Area_inhabitable_2010, to = c(0,100), na.rm = TRUE, finite = TRUE)
  df$Income_taxable_per_capita_0_100 <- rescale(df$Income_taxable_per_capita_2010, to = c(0, 100), na.rm = TRUE, finite = TRUE)
  df$Land_price_residential_0_100 <- rescale(df$Land_price_residential_2009, to = c(0,100), na.rm = TRUE, finite = TRUE)
  df$Land_price_residential_log <- log(df$Land_price_residential_2009)
  df$PercentChange_CO2_industry_0_100 <- rescale(df$PercentChange_CO2_industry_2000_03, to = c(0,100), na.rm = TRUE, finite = TRUE)
  df$Unemployment_0_100 <- rescale(df$Unemployment_2010, to = c(0,100), na.rm = TRUE, finite = TRUE)
  df$Population_0_100 <- rescale(df$Population_2010, to = c(0,100), na.rm = TRUE, finite = TRUE)
  df$Population_log <- log(df$Population_2010)
  df$Population_log20 <- log(df$Population_2010, 20)
  df$Population_log20_0_100 <- rescale(df$Population_log20, to = c(0,100), na.rm = TRUE, finite = TRUE)
  df$PercentEmployment_primary_0_100 <- rescale(df$PercentEmployment_primary_2010, to = c(0,100), na.rm = TRUE, finite = TRUE)
  df$PercentEmployment_secondary_0_100 <- rescale(df$PercentEmployment_secondary_2010, to = c(0,100), na.rm = TRUE, finite = TRUE)
  df$PercentEmployment_tertiary_0_100 <- rescale(df$PercentEmployment_tertiary_2010, to = c(0,100), na.rm = TRUE, finite = TRUE)
  df$PercentEmployment_EGHW_0_100 <- rescale(df$PercentEmployment_EGHW_2009, to = c(0,100), na.rm = TRUE, finite = TRUE)
  df$PercentChange_CO2_transport_2000_03_0_100 <- rescale(df$PercentChange_CO2_transport_2000_03, to = c(0,100), na.rm = TRUE, finite = TRUE)
  df$PercentChange_CO2_transport_2003_10_0_100 <- rescale(df$PercentChange_CO2_transport_2003_10, to = c(0,100), na.rm = TRUE, finite = TRUE)
  # Generate Zero & Count versions of dataset
  ## This allows us to create visualizations of both the count and zero parts of zero inflation models using Zelig.
  ## Can successfully produce correct odds ratio by modeling just count portion of dataset in zelig. Intercept is slightly larger in zelig model, but everything else checks out.
  df.count <- df
  df.count$SP_total[df.count$SP_total==0] <- NA
  df.count$SP_10_49_kW[df.count$SP_10_49_kW==0] <- NA
  df.count$SP_50_499_kW[df.count$SP_50_499_kW==0] <- NA
  df.count$SP_500_1999kW[df.count$SP_500_1999kW==0] <- NA
  df.count$`SP_2000_kW+`[df.count$`SP_2000_kW+`==0] <- NA
}


# Descriptive Statistics ------------------

summary(df)
describe(df) ## Warnings only have to do with case names; not an issue.

Pop$SP_total <- (Pop$SP_10_49_kW+Pop$SP_50_499_kW+Pop$SP_500_1999kW+Pop$`SP_2000_kW+`)
summary(Pop$SP_total) 
sd(Pop$SP_total)
summary(Pop$SP_10_49_kW)
sd(Pop$SP_10_49_kW)
summary(Pop$SP_50_499_kW)
sd(Pop$SP_50_499_kW)
summary(Pop$SP_500_1999kW)
sd(Pop$SP_500_1999kW)
summary(Pop$`SP_2000_kW+`)
sd(Pop$`SP_2000_kW+`)
summary(Pop$PV_output_2018)
sd(Pop$PV_output_2018)
summary(Pop$Windspeed_2018)
sd(Pop$Windspeed_2018)
summary(Pop$PercentForest_cover_2009)
sd(Pop$PercentForest_cover_2009)
Pop$Pop_density_2010 <- (Pop$Population_2010/Pop$Area_inhabitable_2010)
summary(Pop$Pop_density_2010)
sd(Pop$Pop_density_2010, na.rm = TRUE)
summary(Pop$Population_2010)
sd(Pop$Population_2010, na.rm = TRUE)
summary(Pop$Area_inhabitable_2010)
sd(Pop$Area_inhabitable_2010, na.rm = TRUE)
summary(Pop$Land_price_residential_2009)
sd(Pop$Land_price_residential_2009, na.rm = TRUE)
summary(Pop$Crime_Rate_2008)
sd(Pop$Crime_Rate_2008, na.rm = TRUE)
summary(Pop$ku_LDP_voteshare_2012)
sd(Pop$ku_LDP_voteshare_2012, na.rm = TRUE)
summary(Pop$Income_taxable_per_capita_2010)
sd(Pop$Income_taxable_per_capita_2010, na.rm = TRUE)
summary(Pop$Unemployment_2010)
sd(Pop$Unemployment_2010, na.rm = TRUE)
summary(Pop$Financial_Str_Index_2010)
sd(Pop$Financial_Str_Index_2010, na.rm = TRUE)
summary(Pop$PercentChange_CO2_industry_2000_03)
sd(Pop$PercentChange_CO2_industry_2000_03, na.rm = TRUE)


## Methodology ------------------------
## First, we confirm data overdispersion using dispersion tests.
dispersioncheck <- var(df$SP_total) / mean(df$SP_total)
dispersioncheck ## If dispersion is over or under 1 significantly, the negative binomial should be used instead of poisson.
remove(dispersioncheck)

dispersioncheck <- var(df$SP_10_49_kW) / mean(df$SP_10_49_kW)
dispersioncheck ## If dispersion is over or under 1 significantly, the negative binomial should be used instead of poisson.
remove(dispersioncheck)

dispersioncheck <- var(df$SP_50_499_kW) / mean(df$SP_50_499_kW)
dispersioncheck ## If dispersion is over or under 1 significantly, the negative binomial should be used instead of poisson.
remove(dispersioncheck)

dispersioncheck <- var(df$SP_500_1999kW) / mean(df$SP_500_1999kW)
dispersioncheck ## If dispersion is over or under 1 significantly, the negative binomial should be used instead of poisson.
remove(dispersioncheck)

## And for the zero-truncated version:
dispersioncheck <- var(df.count$SP_total,na.rm=TRUE)/mean(df.count$SP_total, na.rm=TRUE)
dispersioncheck ## If dispersion is over or under 1 significantly, the negative binomial should be used instead of poisson.
remove(dispersioncheck)

dispersioncheck <- var(df.count$SP_10_49_kW,na.rm=TRUE)/mean(df.count$SP_10_49_kW, na.rm=TRUE)
dispersioncheck ## If dispersion is over or under 1 significantly, the negative binomial should be used instead of poisson.
remove(dispersioncheck)


dispersioncheck <- var(df.count$SP_50_499_kW,na.rm=TRUE)/mean(df.count$SP_50_499_kW, na.rm=TRUE)
dispersioncheck ## If dispersion is over or under 1 significantly, the negative binomial should be used instead of poisson.
remove(dispersioncheck)

dispersioncheck <- var(df.count$SP_500_1999kW,na.rm=TRUE)/mean(df.count$SP_500_1999kW, na.rm=TRUE)
dispersioncheck ## If dispersion is over or under 1 significantly, the negative binomial should be used instead of poisson.
remove(dispersioncheck)


dispersioncheck <- var(df.count$`SP_2000_kW+`,na.rm=TRUE)/mean(df.count$`SP_2000_kW+`, na.rm=TRUE)
dispersioncheck ## If dispersion is over or under 1 significantly, the negative binomial should be used instead of poisson.
remove(dispersioncheck)

## This tells us that a negative binomial will better model the data than a poisson, which assumes that the data is equally dispersed.

## Ordinarily, we would model overdispersed data using negative binomial (nb) and zero-inflated negative binomial (zinb) models, and then compare which fits better using a vuong test.
## However, this study seeks to test and control for the effects of prefectures on the distribution of solar power plants.
## In order to compare the fit of nb and zinb models, the independent variables should be the same (otherwise, you are comparing apples and oranges).
## However, since zinb models split data into negative binomial count models and logit zero models, the number of variables you can test against in the logit model 
## is limited by the number of zeros in your dataset, since the sample size available determines the degrees of freedom.
## The summary() function below outlines the number of zeros (displayed as NA's) for each of our dependent variables of interest.

summary(df.count$SP_total) #102 zeros (can't model)
summary(df.count$SP_10_49_kW) #120 zeros (can't model)
summary(df.count$SP_50_499_kW) #540 zeros
summary(df.count$SP_500_1999kW) #479 zeros
summary(df.count$`SP_2000_kW+`) #1251 zeros

# Since prefectural varaibles cannot be included in the zero-models for all dependent variables, then we cannot
# A) compare effects across dependent variable models, or
# B) prove that a zinb fits better than a nb.

# When we plot histograms that show the frequency of each unique count value, we find evidence of extreme zero inflation.
hist(df$SP_total, nclass =max(count(df$SP_total)), xlab = "Solar Plants (all sizes)", main=NULL)
hist(df$SP_10_49_kW, nclass =max(count(df$SP_10_49_kW)), xlab = "Solar Plants 10-49 kW in installed capacity", main=NULL)
hist(df$SP_50_499_kW, nclass =max(count(df$SP_50_499_kW)), xlab = "Solar Plants 50-499 kW in installed capacity", main=NULL)
hist(df$SP_500_1999kW, nclass =max(count(df$SP_500_1999kW)), xlab = "Solar Plants 500-1999 kW in installed capacity", main=NULL)
hist(df$`SP_2000_kW+`, nclass =max(count(df$`SP_2000_kW`)), xlab = "Solar Plants 2000 kW+ in installed capacity", main=NULL)
# The functions below shows how many times more 0s each variable has than 1s.
(sum(df$SP_total==0) / sum(df$SP_total==1))
(sum(df$SP_10_49_kW==0) / sum(df$SP_10_49_kW==1)) 
(sum(df$SP_50_499_kW==0) / sum(df$SP_50_499_kW==1)) 
(sum(df$SP_500_1999kW==0) / sum(df$SP_500_1999kW==1))
(sum(df$SP_2000_kW==0) / sum(df$SP_2000_kW==1))
# Between 1.96~9.34 times as many zeros as one. There is significant zero inflation here.

# Instead, to compensate for zero-inflation without using a zero-inflated model,
# this study focuses only on the count (>=1) portion of the data.
# This examines cases that at least one solar plant in the designated category, and explains why some towns have more than others.

# This study adopts the Zelig package's negbin model to evaluate effects on the count data only. I use Zelig for its excellent integration with visualization tools.




# Solar (Total) Negative Binomial ------------------------------------------

summary(df$avtsunami)

nb.sp_total <- glm.nb(SP_total ~ PV_output_2018 + Area_inhabitable_2010 + Population_2010 +
                        PercentEmployment_EGHW_2009 + Land_price_residential_2009 + 
                        Crime_Rate_2008 + ku_LDP_voteshare_2012 + 
                        Unemployment_2010 + Income_taxable_per_capita_2010 +
                        Ratio_Revs_Exp_2010 + PercentChange_CO2_industry_2000_03 +
                        death11 + damaged11 + Fukushima_Exclusion_Zone +
                        Hokkaido +  Aomori +  Akita + Iwate +  Yamagata +  Miyagi + 
                        Fukushima + Tochigi + Ibaraki + Gunma + Saitama + Chiba + 
                        Kanagawa + Niigata + Nagano + Yamanashi + Shizuoka + 
                        Toyama + Ishikawa + Fukui + Gifu + Aichi + Shiga + Mie + 
                        Kyoto + Nara + Wakayama + Osaka + Hyogo + Tokushima + Kagawa + 
                        Kochi + Ehime + Okayama + Tottori + Shimane + Hiroshima + Yamaguchi + 
                        Oita + Fukuoka + Saga + Nagasaki + Kumamoto + Miyazaki + Kagoshima,
                      data=df.count, link = log)
p.sp_total <- glm(SP_total ~ PV_output_2018 + Area_inhabitable_2010 + Population_2010 +
                    PercentEmployment_EGHW_2009 + Land_price_residential_2009 + 
                    Crime_Rate_2008 + ku_LDP_voteshare_2012 + 
                    Unemployment_2010 + Income_taxable_per_capita_2010 +
                    Ratio_Revs_Exp_2010 + PercentChange_CO2_industry_2000_03 +
                    death11 + damaged11 + Fukushima_Exclusion_Zone +
                    Hokkaido +  Aomori +  Akita + Iwate +  Yamagata +  Miyagi + 
                    Fukushima + Tochigi + Ibaraki + Gunma + Saitama + Chiba + 
                    Kanagawa + Niigata + Nagano + Yamanashi + Shizuoka + 
                    Toyama + Ishikawa + Fukui + Gifu + Aichi + Shiga + Mie + 
                    Kyoto + Nara + Wakayama + Osaka + Hyogo + Tokushima + Kagawa + 
                    Kochi + Ehime + Okayama + Tottori + Shimane + Hiroshima + Yamaguchi + 
                    Oita + Fukuoka + Saga + Nagasaki + Kumamoto + Miyazaki + Kagoshima,
                      data=df.count, family = "poisson")
# Model Fit
# Count distribution Diagnostics
goodfit.sp_total <- goodfit(df.count$SP_total)
rootogram(goodfit.sp_total)
distplot(df.count$SP_total, type="nbinom")
remove(goodfit.sp_total)
# Overdispersion
deviance(nb.sp_total)/nb.sp_total$df.residual
# Goodness of Fit measures
with(p.sp_total, cbind(res.deviance = deviance, df = df.residual, 
                       p = pchisq(deviance, df.residual, lower.tail = FALSE)))
with(nb.sp_total, cbind(res.deviance = deviance, df = df.residual,
                        p = pchisq(deviance, df.residual, lower.tail = FALSE)))
pchisq(2 * (logLik(nb.sp_total) - logLik(p.sp_total)), df = 1, lower.tail = FALSE)
vuong(nb.sp_total, p.sp_total)
#Multicollinearity diagnostics

vif(nb.sp_total)
sqrt(vif(nb.sp_total)) > 2 # problem?
# Residuals
resDHARMa = simulateResiduals(nb.sp_total)
plot(resDHARMa)
remove(resDHARMa)
# Influence and leverage points
influencePlot(nb.sp_total)
# Due to outliers and non-normal distributions, I regenerate the model using robust standard errors below.

####
summary(nb.sp_total)
PseudoR2(nb.sp_total)
irr.sp_total <- negbinirr(formula = SP_total ~ PV_output_2018 + Area_inhabitable_2010 + Population_2010 +
                            PercentEmployment_EGHW_2009 + Land_price_residential_2009 + 
                            Crime_Rate_2008 + ku_LDP_voteshare_2012 + 
                            Unemployment_2010 + Income_taxable_per_capita_2010 +
                            Ratio_Revs_Exp_2010 + PercentChange_CO2_industry_2000_03 +
                            death11 + damaged11 + Fukushima_Exclusion_Zone +
                            Hokkaido +  Aomori +  Akita + Iwate +  Yamagata +  Miyagi + 
                            Fukushima + Tochigi + Ibaraki + Gunma + Saitama + Chiba + 
                            Kanagawa + Niigata + Nagano + Yamanashi + Shizuoka + 
                            Toyama + Ishikawa + Fukui + Gifu + Aichi + Shiga + Mie + 
                            Kyoto + Nara + Wakayama + Osaka + Hyogo + Tokushima + Kagawa + 
                            Kochi + Ehime + Okayama + Tottori + Shimane + Hiroshima + Yamaguchi + 
                            Oita + Fukuoka + Saga + Nagasaki + Kumamoto + Miyazaki + Kagoshima,
                          data = df.count, robust=TRUE)
# Calculates incident rate ratio, with robust standard errors.
mfx.sp_total <- negbinmfx(formula = SP_total ~PV_output_2018 + Area_inhabitable_2010 + Population_2010 +
                            PercentEmployment_EGHW_2009 + Land_price_residential_2009 + 
                            Crime_Rate_2008 + ku_LDP_voteshare_2012 + 
                            Unemployment_2010 + Income_taxable_per_capita_2010 +
                            Ratio_Revs_Exp_2010 + PercentChange_CO2_industry_2000_03 +
                            death11 + damaged11 + Fukushima_Exclusion_Zone +
                            Hokkaido +  Aomori +  Akita + Iwate +  Yamagata +  Miyagi + 
                            Fukushima + Tochigi + Ibaraki + Gunma + Saitama + Chiba + 
                            Kanagawa + Niigata + Nagano + Yamanashi + Shizuoka + 
                            Toyama + Ishikawa + Fukui + Gifu + Aichi + Shiga + Mie + 
                            Kyoto + Nara + Wakayama + Osaka + Hyogo + Tokushima + Kagawa + 
                            Kochi + Ehime + Okayama + Tottori + Shimane + Hiroshima + Yamaguchi + 
                            Oita + Fukuoka + Saga + Nagasaki + Kumamoto + Miyazaki + Kagoshima,
          data = df.count, robust=TRUE)
# Calculates marginal effects at the means, with robust standard errors.

# Zelig Simulations for Solar (total) --------------------------------------

z.sp_total <- zelig(formula = SP_total ~  PV_output_2018 + Area_inhabitable_2010 + Population_2010 +
                      PercentEmployment_EGHW_2009 + Land_price_residential_2009 +
                      Crime_Rate_2008 + ku_LDP_voteshare_2012 + 
                      Unemployment_2010 + Income_taxable_per_capita_2010 +
                      Ratio_Revs_Exp_2010 + PercentChange_CO2_industry_2000_03 +
                      damaged11 + Fukushima_Exclusion_Zone +
                      Hokkaido +  Aomori +  Akita + Iwate +  Yamagata +  Miyagi + 
                      Fukushima + Tochigi + Ibaraki + Gunma + Saitama + Chiba + 
                      Kanagawa + Niigata + Nagano + Yamanashi + Shizuoka + 
                      Toyama + Ishikawa + Fukui + Gifu + Aichi + Shiga + Mie + 
                      Kyoto + Nara + Wakayama + Osaka + Hyogo + Tokushima + Kagawa + 
                      Kochi + Ehime + Okayama + Tottori + Shimane + Hiroshima + Yamaguchi + 
                      Oita + Fukuoka + Saga + Nagasaki + Kumamoto + Miyazaki + Kagoshima,
                    model = "negbin", data=df.count)

summary(z.sp_total)
odds.z.sp_total <- exp(coef(z.sp_total))
odds.z.sp_total  
irr.sp_total # zelig and mfx odds ratios/irr are almost identical. Can plot with zelig.
remove(odds.z.sp_total)
x.sp_total <- setx(z.sp_total)
s.sp_total <- sim(z.sp_total, x = x.sp_total)
summary(s.sp_total)

summary(df.count$Unemployment_2010)
x.weak <- setx(z.sp_total, Unemployment_2010 = 0)
x.strong <- setx(z.sp_total, Unemployment_2010 = 25)
s.test <- sim(z.sp_total, x = x.strong, x1 = x.weak)
plot.s.sp_total <- setx(z.sp_total, Unemployment_2010 = 0:25)
sim.plot.s.sp_total <- sim(plot.s.sp_total)
remove(x.weak, x.strong, s.test, plot.s.sp_total)
plot(sim.plot.s.sp_total, ylab="Expected Solar Plants", xlab="Unemployment Rate (%) 2010")

summary(df.count$Miyagi)
x.weak <- setx(z.sp_total, Miyagi = 0)
x.strong <- setx(z.sp_total, Miyagi = 1)
s.test <- sim(z.sp_total, x = x.strong, x1 = x.weak)
plot.s.sp_total <- setx(z.sp_total, Miyagi = 0:1)
sim.plot.s.sp_total <- sim(plot.s.sp_total)
remove(x.weak, x.strong, s.test, plot.s.sp_total)
plot(sim.plot.s.sp_total, ylab="Expected Solar Plants", xlab="Miyagi Prefecture")

remove(x.sp_total, z.sp_total, s.sp_total, sim.plot.s.sp_total)



# Solar (10-49kW) Negative Binomial -------------------------------------------
nb.SP_10_49_kW <- glm.nb(SP_10_49_kW ~ PV_output_2018 + Area_inhabitable_2010 + Population_2010 +
                           PercentEmployment_EGHW_2009 + Land_price_residential_2009 + 
                           Crime_Rate_2008 + ku_LDP_voteshare_2012 + 
                           Unemployment_2010 + Income_taxable_per_capita_2010 +
                           Ratio_Revs_Exp_2010 + PercentChange_CO2_industry_2000_03 +
                           death11 + damaged11 + Fukushima_Exclusion_Zone +
                           Hokkaido +  Aomori +  Akita + Iwate +  Yamagata +  Miyagi + 
                           Fukushima + Tochigi + Ibaraki + Gunma + Saitama + Chiba + 
                           Kanagawa + Niigata + Nagano + Yamanashi + Shizuoka + 
                           Toyama + Ishikawa + Fukui + Gifu + Aichi + Shiga + Mie + 
                           Kyoto + Nara + Wakayama + Osaka + Hyogo + Tokushima + Kagawa + 
                           Kochi + Ehime + Okayama + Tottori + Shimane + Hiroshima + Yamaguchi + 
                           Oita + Fukuoka + Saga + Nagasaki + Kumamoto + Miyazaki + Kagoshima,
                      data=df.count, link = log)
p.SP_10_49_kW <- glm(SP_10_49_kW ~ PV_output_2018 + Area_inhabitable_2010 + Population_2010 +
                       PercentEmployment_EGHW_2009 + Land_price_residential_2009 + 
                       Crime_Rate_2008 + ku_LDP_voteshare_2012 + 
                       Unemployment_2010 + Income_taxable_per_capita_2010 +
                       Ratio_Revs_Exp_2010 + PercentChange_CO2_industry_2000_03 +
                       death11 + damaged11 + Fukushima_Exclusion_Zone +
                       Hokkaido +  Aomori +  Akita + Iwate +  Yamagata +  Miyagi + 
                       Fukushima + Tochigi + Ibaraki + Gunma + Saitama + Chiba + 
                       Kanagawa + Niigata + Nagano + Yamanashi + Shizuoka + 
                       Toyama + Ishikawa + Fukui + Gifu + Aichi + Shiga + Mie + 
                       Kyoto + Nara + Wakayama + Osaka + Hyogo + Tokushima + Kagawa + 
                       Kochi + Ehime + Okayama + Tottori + Shimane + Hiroshima + Yamaguchi + 
                       Oita + Fukuoka + Saga + Nagasaki + Kumamoto + Miyazaki + Kagoshima,
                  data=df.count, family = "poisson")
# Model Fit
# Count distribution Diagnostics
goodfit.SP_10_49_kW <- goodfit(df.count$SP_10_49_kW)
rootogram(goodfit.SP_10_49_kW)
distplot(df.count$SP_10_49_kW, type="nbinom")
remove(goodfit.SP_10_49_kW)
# Overdispersion
deviance(nb.SP_10_49_kW)/nb.SP_10_49_kW$df.residual
# Goodness of Fit measures
with(p.SP_10_49_kW, cbind(res.deviance = deviance, df = df.residual, 
                       p = pchisq(deviance, df.residual, lower.tail = FALSE)))
with(nb.SP_10_49_kW, cbind(res.deviance = deviance, df = df.residual,
                        p = pchisq(deviance, df.residual, lower.tail = FALSE)))
pchisq(2 * (logLik(nb.SP_10_49_kW) - logLik(p.SP_10_49_kW)), df = 1, lower.tail = FALSE)
vuong(nb.SP_10_49_kW, p.SP_10_49_kW)
#Multicollinearity diagnostics
vif(nb.SP_10_49_kW)
sqrt(vif(nb.SP_10_49_kW)) > 2 # problem?
# Residuals
resDHARMa = simulateResiduals(nb.SP_10_49_kW)
plot(resDHARMa)
remove(resDHARMa)

# Influence and leverage points
influencePlot(nb.SP_10_49_kW)
## Given outliers and non-normal distributions, I use robust standard errors below.


####
summary(nb.SP_10_49_kW)
PseudoR2(nb.SP_10_49_kW)
irr.SP_10_49_kW <- negbinirr(formula = SP_10_49_kW ~ PV_output_2018 + Area_inhabitable_2010 + Population_2010 +
                               PercentEmployment_EGHW_2009 + Land_price_residential_2009 + 
                               Crime_Rate_2008 + ku_LDP_voteshare_2012 + 
                               Unemployment_2010 + Income_taxable_per_capita_2010 +
                               Ratio_Revs_Exp_2010 + PercentChange_CO2_industry_2000_03 +
                               death11 + damaged11 + Fukushima_Exclusion_Zone +
                               Hokkaido +  Aomori +  Akita + Iwate +  Yamagata +  Miyagi + 
                               Fukushima + Tochigi + Ibaraki + Gunma + Saitama + Chiba + 
                               Kanagawa + Niigata + Nagano + Yamanashi + Shizuoka + 
                               Toyama + Ishikawa + Fukui + Gifu + Aichi + Shiga + Mie + 
                               Kyoto + Nara + Wakayama + Osaka + Hyogo + Tokushima + Kagawa + 
                               Kochi + Ehime + Okayama + Tottori + Shimane + Hiroshima + Yamaguchi + 
                               Oita + Fukuoka + Saga + Nagasaki + Kumamoto + Miyazaki + Kagoshima,
                          data = df.count, robust=TRUE)
# Calculates incident rate ratio, with robust standard errors.
mfx.SP_10_49_kW <- negbinmfx(formula = SP_10_49_kW ~ PV_output_2018 + Area_inhabitable_2010 + Population_2010 +
                               PercentEmployment_EGHW_2009 + Land_price_residential_2009 + 
                               Crime_Rate_2008 + ku_LDP_voteshare_2012 + 
                               Unemployment_2010 + Income_taxable_per_capita_2010 +
                               Ratio_Revs_Exp_2010 + PercentChange_CO2_industry_2000_03 +
                               death11 + damaged11 + Fukushima_Exclusion_Zone +
                               Hokkaido +  Aomori +  Akita + Iwate +  Yamagata +  Miyagi + 
                               Fukushima + Tochigi + Ibaraki + Gunma + Saitama + Chiba + 
                               Kanagawa + Niigata + Nagano + Yamanashi + Shizuoka + 
                               Toyama + Ishikawa + Fukui + Gifu + Aichi + Shiga + Mie + 
                               Kyoto + Nara + Wakayama + Osaka + Hyogo + Tokushima + Kagawa + 
                               Kochi + Ehime + Okayama + Tottori + Shimane + Hiroshima + Yamaguchi + 
                               Oita + Fukuoka + Saga + Nagasaki + Kumamoto + Miyazaki + Kagoshima,
                          data = df.count, robust=TRUE)
# Calculates marginal effects at the means, with robust standard errors.
# Solar (50-499kW) Negative Binomial ---------------------------
nb.SP_50_499_kW <- glm.nb(SP_50_499_kW ~ PV_output_2018 + Area_inhabitable_2010 + Population_2010 +
                            PercentEmployment_EGHW_2009 + Land_price_residential_2009 + 
                            Crime_Rate_2008 + ku_LDP_voteshare_2012 + 
                            Unemployment_2010 + Income_taxable_per_capita_2010 +
                            Ratio_Revs_Exp_2010 + PercentChange_CO2_industry_2000_03 +
                            death11 + damaged11 + Fukushima_Exclusion_Zone +
                            Hokkaido +  Aomori +  Akita + Iwate +  Yamagata +  Miyagi + 
                            Fukushima + Tochigi + Ibaraki + Gunma + Saitama + Chiba + 
                            Kanagawa + Niigata + Nagano + Yamanashi + Shizuoka + 
                            Toyama + Ishikawa + Fukui + Gifu + Aichi + Shiga + Mie + 
                            Kyoto + Nara + Wakayama + Osaka + Hyogo + Tokushima + Kagawa + 
                            Kochi + Ehime + Okayama + Tottori + Shimane + Hiroshima + Yamaguchi + 
                            Oita + Fukuoka + Saga + Nagasaki + Kumamoto + Miyazaki + Kagoshima,
                         data=df.count, link = log)
p.SP_50_499_kW <- glm(SP_50_499_kW ~ PV_output_2018 + Area_inhabitable_2010 + Population_2010 +
                        PercentEmployment_EGHW_2009 + Land_price_residential_2009 + 
                        Crime_Rate_2008 + ku_LDP_voteshare_2012 + 
                        Unemployment_2010 + Income_taxable_per_capita_2010 +
                        Ratio_Revs_Exp_2010 + PercentChange_CO2_industry_2000_03 +
                        death11 + damaged11 + Fukushima_Exclusion_Zone +
                        Hokkaido +  Aomori +  Akita + Iwate +  Yamagata +  Miyagi + 
                        Fukushima + Tochigi + Ibaraki + Gunma + Saitama + Chiba + 
                        Kanagawa + Niigata + Nagano + Yamanashi + Shizuoka + 
                        Toyama + Ishikawa + Fukui + Gifu + Aichi + Shiga + Mie + 
                        Kyoto + Nara + Wakayama + Osaka + Hyogo + Tokushima + Kagawa + 
                        Kochi + Ehime + Okayama + Tottori + Shimane + Hiroshima + Yamaguchi + 
                        Oita + Fukuoka + Saga + Nagasaki + Kumamoto + Miyazaki + Kagoshima,
                     data=df.count, family = "poisson")
# Model Fit
# Count distribution Diagnostics
goodfit.SP_50_499_kW <- goodfit(df.count$SP_50_499_kW)
rootogram(goodfit.SP_50_499_kW)
distplot(df.count$SP_50_499_kW, type="nbinom")
remove(goodfit.SP_50_499_kW)
# Overdispersion
deviance(nb.SP_50_499_kW)/nb.SP_50_499_kW$df.residual
# Goodness of Fit measures
with(p.SP_50_499_kW, cbind(res.deviance = deviance, df = df.residual, 
                          p = pchisq(deviance, df.residual, lower.tail = FALSE)))
with(nb.SP_50_499_kW, cbind(res.deviance = deviance, df = df.residual,
                           p = pchisq(deviance, df.residual, lower.tail = FALSE)))
pchisq(2 * (logLik(nb.SP_50_499_kW) - logLik(p.SP_50_499_kW)), df = 1, lower.tail = FALSE)
vuong(nb.SP_50_499_kW, p.SP_50_499_kW)
#Multicollinearity diagnostics
vif(nb.SP_50_499_kW)
sqrt(vif(nb.SP_50_499_kW)) > 2 # problem?
# Residuals
resDHARMa = simulateResiduals(nb.SP_50_499_kW)
plot(resDHARMa) # significant deviation in residuals
remove(resDHARMa)

# Influence and leverage points
influencePlot(nb.SP_50_499_kW)

# Due to outliers and non-normal distributions, I use robust standard errors below.

####
summary(nb.SP_50_499_kW)
PseudoR2(nb.SP_50_499_kW)
irr.SP_50_499_kW <- negbinirr(formula = SP_50_499_kW ~ PV_output_2018 + Area_inhabitable_2010 + Population_2010 +
                                PercentEmployment_EGHW_2009 + Land_price_residential_2009 + 
                                Crime_Rate_2008 + ku_LDP_voteshare_2012 + 
                                Unemployment_2010 + Income_taxable_per_capita_2010 +
                                Ratio_Revs_Exp_2010 + PercentChange_CO2_industry_2000_03 +
                                death11 + damaged11 + Fukushima_Exclusion_Zone +
                                Hokkaido +  Aomori +  Akita + Iwate +  Yamagata +  Miyagi + 
                                Fukushima + Tochigi + Ibaraki + Gunma + Saitama + Chiba + 
                                Kanagawa + Niigata + Nagano + Yamanashi + Shizuoka + 
                                Toyama + Ishikawa + Fukui + Gifu + Aichi + Shiga + Mie + 
                                Kyoto + Nara + Wakayama + Osaka + Hyogo + Tokushima + Kagawa + 
                                Kochi + Ehime + Okayama + Tottori + Shimane + Hiroshima + Yamaguchi + 
                                Oita + Fukuoka + Saga + Nagasaki + Kumamoto + Miyazaki + Kagoshima,
                             data = df.count, robust=TRUE)
# Calculates incident rate ratio, with robust standard errors.
mfx.SP_50_499_kW <- negbinmfx(formula = SP_50_499_kW ~ PV_output_2018 + Area_inhabitable_2010 + Population_2010 +
                                PercentEmployment_EGHW_2009 + Land_price_residential_2009 + 
                                Crime_Rate_2008 + ku_LDP_voteshare_2012 + 
                                Unemployment_2010 + Income_taxable_per_capita_2010 +
                                Ratio_Revs_Exp_2010 + PercentChange_CO2_industry_2000_03 +
                                death11 + damaged11 + Fukushima_Exclusion_Zone +
                                Hokkaido +  Aomori +  Akita + Iwate +  Yamagata +  Miyagi + 
                                Fukushima + Tochigi + Ibaraki + Gunma + Saitama + Chiba + 
                                Kanagawa + Niigata + Nagano + Yamanashi + Shizuoka + 
                                Toyama + Ishikawa + Fukui + Gifu + Aichi + Shiga + Mie + 
                                Kyoto + Nara + Wakayama + Osaka + Hyogo + Tokushima + Kagawa + 
                                Kochi + Ehime + Okayama + Tottori + Shimane + Hiroshima + Yamaguchi + 
                                Oita + Fukuoka + Saga + Nagasaki + Kumamoto + Miyazaki + Kagoshima,
                             data = df.count, robust=TRUE)
# Calculates marginal effects at the means, with robust standard errors.

# Solar (500-1999kW) Negative Binomial ---------------------------
nb.SP_500_1999kW <- glm.nb(SP_500_1999kW ~ PV_output_2018 + Area_inhabitable_2010 + Population_2010 +
                             PercentEmployment_EGHW_2009 + Land_price_residential_2009 + 
                             Crime_Rate_2008 + ku_LDP_voteshare_2012 + 
                             Unemployment_2010 + Income_taxable_per_capita_2010 +
                             Ratio_Revs_Exp_2010 + PercentChange_CO2_industry_2000_03 +
                             death11 + damaged11 + Fukushima_Exclusion_Zone +
                             Hokkaido +  Aomori +  Akita + Iwate +  Yamagata +  Miyagi + 
                             Fukushima + Tochigi + Ibaraki + Gunma + Saitama + Chiba + 
                             Kanagawa + Niigata + Nagano + Yamanashi + Shizuoka + 
                             Toyama + Ishikawa + Fukui + Gifu + Aichi + Shiga + Mie + 
                             Kyoto + Nara + Wakayama + Osaka + Hyogo + Tokushima + Kagawa + 
                             Kochi + Ehime + Okayama + Tottori + Shimane + Hiroshima + Yamaguchi + 
                             Oita + Fukuoka + Saga + Nagasaki + Kumamoto + Miyazaki + Kagoshima,
                          data=df.count, link = log)
p.SP_500_1999kW <- glm(SP_500_1999kW ~ PV_output_2018 + Area_inhabitable_2010 + Population_2010 +
                         PercentEmployment_EGHW_2009 + Land_price_residential_2009 + 
                         Crime_Rate_2008 + ku_LDP_voteshare_2012 + 
                         Unemployment_2010 + Income_taxable_per_capita_2010 +
                         Ratio_Revs_Exp_2010 + PercentChange_CO2_industry_2000_03 +
                         death11 + damaged11 + Fukushima_Exclusion_Zone +
                         Hokkaido +  Aomori +  Akita + Iwate +  Yamagata +  Miyagi + 
                         Fukushima + Tochigi + Ibaraki + Gunma + Saitama + Chiba + 
                         Kanagawa + Niigata + Nagano + Yamanashi + Shizuoka + 
                         Toyama + Ishikawa + Fukui + Gifu + Aichi + Shiga + Mie + 
                         Kyoto + Nara + Wakayama + Osaka + Hyogo + Tokushima + Kagawa + 
                         Kochi + Ehime + Okayama + Tottori + Shimane + Hiroshima + Yamaguchi + 
                         Oita + Fukuoka + Saga + Nagasaki + Kumamoto + Miyazaki + Kagoshima,
                      data=df.count, family = "poisson")
# Model Fit
# Count distribution Diagnostics
goodfit.SP_500_1999kW <- goodfit(df.count$SP_500_1999kW)
rootogram(goodfit.SP_500_1999kW)
distplot(df.count$SP_500_1999kW, type="nbinom")
remove(goodfit.SP_500_1999kW)
# Overdispersion
deviance(nb.SP_500_1999kW)/nb.SP_500_1999kW$df.residual
# Goodness of Fit measures
with(p.SP_500_1999kW, cbind(res.deviance = deviance, df = df.residual, 
                           p = pchisq(deviance, df.residual, lower.tail = FALSE)))
with(nb.SP_500_1999kW, cbind(res.deviance = deviance, df = df.residual,
                            p = pchisq(deviance, df.residual, lower.tail = FALSE)))
pchisq(2 * (logLik(nb.SP_500_1999kW) - logLik(p.SP_500_1999kW)), df = 1, lower.tail = FALSE)
vuong(nb.SP_500_1999kW, p.SP_500_1999kW)
#Multicollinearity diagnostics
vif(nb.SP_500_1999kW)
sqrt(vif(nb.SP_500_1999kW)) > 2 # problem?    Hokkaido has a score of 6.1, which is under 10.
# Residuals
resDHARMa = simulateResiduals(nb.SP_500_1999kW)
plot(resDHARMa) # significant deviation in residuals
remove(resDHARMa)
# Influence and leverage points
influencePlot(nb.SP_500_1999kW)
# Due to outliers and non-normal distribution, I use robust standard errors.

####
summary(nb.SP_500_1999kW)
PseudoR2(nb.SP_500_1999kW)
irr.SP_500_1999kW <- negbinirr(formula = SP_500_1999kW ~ PV_output_2018 + Area_inhabitable_2010 + Population_2010 +
                                 PercentEmployment_EGHW_2009 + Land_price_residential_2009 + 
                                 Crime_Rate_2008 + ku_LDP_voteshare_2012 + 
                                 Unemployment_2010 + Income_taxable_per_capita_2010 +
                                 Ratio_Revs_Exp_2010 + PercentChange_CO2_industry_2000_03 +
                                 death11 + damaged11 + Fukushima_Exclusion_Zone +
                                 Hokkaido +  Aomori +  Akita + Iwate +  Yamagata +  Miyagi + 
                                 Fukushima + Tochigi + Ibaraki + Gunma + Saitama + Chiba + 
                                 Kanagawa + Niigata + Nagano + Yamanashi + Shizuoka + 
                                 Toyama + Ishikawa + Fukui + Gifu + Aichi + Shiga + Mie + 
                                 Kyoto + Nara + Wakayama + Osaka + Hyogo + Tokushima + Kagawa + 
                                 Kochi + Ehime + Okayama + Tottori + Shimane + Hiroshima + Yamaguchi + 
                                 Oita + Fukuoka + Saga + Nagasaki + Kumamoto + Miyazaki + Kagoshima,
                              data = df.count, robust=TRUE)
# Calculates incident rate ratio, with robust standard errors.
mfx.SP_500_1999kW <- negbinmfx(formula = SP_500_1999kW ~ PV_output_2018 + Area_inhabitable_2010 + Population_2010 +
                                 PercentEmployment_EGHW_2009 + Land_price_residential_2009 + 
                                 Crime_Rate_2008 + ku_LDP_voteshare_2012 + 
                                 Unemployment_2010 + Income_taxable_per_capita_2010 +
                                 Ratio_Revs_Exp_2010 + PercentChange_CO2_industry_2000_03 +
                                 death11 + damaged11 + Fukushima_Exclusion_Zone +
                                 Hokkaido +  Aomori +  Akita + Iwate +  Yamagata +  Miyagi + 
                                 Fukushima + Tochigi + Ibaraki + Gunma + Saitama + Chiba + 
                                 Kanagawa + Niigata + Nagano + Yamanashi + Shizuoka + 
                                 Toyama + Ishikawa + Fukui + Gifu + Aichi + Shiga + Mie + 
                                 Kyoto + Nara + Wakayama + Osaka + Hyogo + Tokushima + Kagawa + 
                                 Kochi + Ehime + Okayama + Tottori + Shimane + Hiroshima + Yamaguchi + 
                                 Oita + Fukuoka + Saga + Nagasaki + Kumamoto + Miyazaki + Kagoshima,
                              data = df.count, robust=TRUE)
# Calculates marginal effects at the means, with robust standard errors.

# Solar (2000 kW +) Negative Binomial ---------------------------

nb.SP_2000_kW <- glm.nb(`SP_2000_kW+` ~ PV_output_2018 + Area_inhabitable_2010 + Population_2010 +
                             PercentEmployment_EGHW_2009 + Land_price_residential_2009 +
                             Crime_Rate_2008 + ku_LDP_voteshare_2012 + 
                             Unemployment_2010 + Income_taxable_per_capita_2010 +
                             Ratio_Revs_Exp_2010 + PercentChange_CO2_industry_2000_03 +
                             death11 + damaged11 + Fukushima_Exclusion_Zone +
                             Aomori +  Akita + Iwate +  Yamagata +  Miyagi + 
                             Fukushima + Tochigi + Ibaraki + Gunma + Saitama + Chiba + 
                             Kanagawa + Niigata + Nagano + Yamanashi + Shizuoka + 
                             Toyama + Ishikawa + Gifu + Aichi + Shiga + Mie + 
                             Kyoto + Nara + Wakayama + Osaka + Hyogo + Tokushima + Kagawa + 
                             Kochi + Ehime + Okayama + Tottori + Shimane + Hiroshima + Yamaguchi + 
                             Oita + Fukuoka + Saga + Nagasaki + Kumamoto + Miyazaki + Kagoshima,
                           data=df.count, link = log) 
# Fukui removed because no effect on dependent variable
# Hokkaido removed due to multicolinearity (VIF)
# Financial capacity index and Voter turnout not included lest model fit algorithm not converge
p.SP_2000_kW <- glm(`SP_2000_kW+` ~ PV_output_2018 + Area_inhabitable_2010 + Population_2010 +
                      PercentEmployment_EGHW_2009 + Land_price_residential_2009 +
                      Crime_Rate_2008 + ku_LDP_voteshare_2012 + 
                      Unemployment_2010 + Income_taxable_per_capita_2010 +
                      Ratio_Revs_Exp_2010 + PercentChange_CO2_industry_2000_03 +
                      death11 + damaged11 + Fukushima_Exclusion_Zone +
                      Aomori +  Akita + Iwate +  Yamagata +  Miyagi + 
                      Fukushima + Tochigi + Ibaraki + Gunma + Saitama + Chiba + 
                      Kanagawa + Niigata + Nagano + Yamanashi + Shizuoka + 
                      Toyama + Ishikawa + Gifu + Aichi + Shiga + Mie + 
                      Kyoto + Nara + Wakayama + Osaka + Hyogo + Tokushima + Kagawa + 
                      Kochi + Ehime + Okayama + Tottori + Shimane + Hiroshima + Yamaguchi + 
                      Oita + Fukuoka + Saga + Nagasaki + Kumamoto + Miyazaki + Kagoshima,
                       data=df.count, family = "poisson") 
# Fukui removed because no effect on dependent variable
# Hokkaido removed due to multicolinearity (VIF)

# Model Fit
# Count distribution Diagnostics
goodfit.SP_2000_kW <- goodfit(df.count$`SP_2000_kW+`)
rootogram(goodfit.SP_2000_kW)
distplot(df.count$`SP_2000_kW+`, type="nbinom")
remove(goodfit.SP_2000_kW)
# Overdispersion
deviance(nb.SP_2000_kW)/nb.SP_2000_kW$df.residual
# Goodness of Fit measures
with(p.SP_2000_kW, cbind(res.deviance = deviance, df = df.residual, 
                            p = pchisq(deviance, df.residual, lower.tail = FALSE)))
with(nb.SP_2000_kW, cbind(res.deviance = deviance, df = df.residual,
                             p = pchisq(deviance, df.residual, lower.tail = FALSE)))
pchisq(2 * (logLik(nb.SP_2000_kW) - logLik(p.SP_2000_kW)), df = 1, lower.tail = FALSE)
vuong(nb.SP_2000_kW, p.SP_2000_kW)
#Multicollinearity diagnostics
vif(nb.SP_2000_kW)
sqrt(vif(nb.SP_2000_kW)) > 2 # problem?
# Residuals
resDHARMa = simulateResiduals(nb.SP_2000_kW)
plot(resDHARMa) # significant deviation in residuals
remove(resDHARMa)

# Influence and leverage points
influencePlot(nb.SP_2000_kW)
# Due to outliers and non-normal distributions, I use robust standard errors.

####
summary(nb.SP_2000_kW)
PseudoR2(nb.SP_2000_kW)
irr.SP_2000_kW <- negbinirr(formula = `SP_2000_kW+` ~ PV_output_2018 + Area_inhabitable_2010 + Population_2010 +
                              PercentEmployment_EGHW_2009 + Land_price_residential_2009 +
                              Crime_Rate_2008 + ku_LDP_voteshare_2012 + 
                              Unemployment_2010 + Income_taxable_per_capita_2010 +
                              Ratio_Revs_Exp_2010 + PercentChange_CO2_industry_2000_03 +
                              death11 + damaged11 + Fukushima_Exclusion_Zone +
                              Aomori +  Akita + Iwate +  Yamagata +  Miyagi + 
                              Fukushima + Tochigi + Ibaraki + Gunma + Saitama + Chiba + 
                              Kanagawa + Niigata + Nagano + Yamanashi + Shizuoka + 
                              Toyama + Ishikawa + Gifu + Aichi + Shiga + Mie + 
                              Kyoto + Nara + Wakayama + Osaka + Hyogo + Tokushima + Kagawa + 
                              Kochi + Ehime + Okayama + Tottori + Shimane + Hiroshima + Yamaguchi + 
                              Oita + Fukuoka + Saga + Nagasaki + Kumamoto + Miyazaki + Kagoshima,
                               data = df.count, robust=TRUE)
# Calculates incident rate ratio, with robust standard errors.
mfx.SP_2000_kW <- negbinmfx(formula = `SP_2000_kW+` ~ PV_output_2018 + Area_inhabitable_2010 + Population_2010 +
                              PercentEmployment_EGHW_2009 + Land_price_residential_2009 +
                              Crime_Rate_2008 + ku_LDP_voteshare_2012 + 
                              Unemployment_2010 + Income_taxable_per_capita_2010 +
                              Ratio_Revs_Exp_2010 + PercentChange_CO2_industry_2000_03 +
                              death11 + damaged11 + Fukushima_Exclusion_Zone +
                              Aomori +  Akita + Iwate +  Yamagata +  Miyagi + 
                              Fukushima + Tochigi + Ibaraki + Gunma + Saitama + Chiba + 
                              Kanagawa + Niigata + Nagano + Yamanashi + Shizuoka + 
                              Toyama + Ishikawa + Gifu + Aichi + Shiga + Mie + 
                              Kyoto + Nara + Wakayama + Osaka + Hyogo + Tokushima + Kagawa + 
                              Kochi + Ehime + Okayama + Tottori + Shimane + Hiroshima + Yamaguchi + 
                              Oita + Fukuoka + Saga + Nagasaki + Kumamoto + Miyazaki + Kagoshima,
                               data = df.count, robust=TRUE)
# Calculates marginal effects at the means, with robust standard errors.

write.csv(mfx.sp_total$mfxest, file="/home/tmfraser65/Downloads/sptotal")
write.csv(mfx.SP_10_49_kW$mfxest, file="/home/tmfraser65/Downloads/sp10-49")
write.csv(mfx.SP_50_499_kW$mfxest, file="/home/tmfraser65/Downloads/sp50-499")
write.csv(mfx.SP_500_1999kW$mfxest, file="/home/tmfraser65/Downloads/sp500-1999")
write.csv(mfx.SP_2000_kW$mfxest, file="/home/tmfraser65/Downloads/sp2000")

# Because R^2 measures do not change when using or not using robust standard errors,
# I calculate model fit criteria using original models in cases where criteria is not reported by the mfx package.


# Model Diagnostics Report for Chart Building -------------------------------------------
PseudoR2(nb.sp_total)
str(mfx.sp_total)
round(cbind(mean(vif(nb.sp_total)), max(vif(nb.sp_total))), digits = 2)
with(nb.sp_total, cbind(res.deviance = deviance, df = df.residual,
                          p = pchisq(deviance, df.residual, lower.tail = FALSE)))
vuong(nb.sp_total, p.sp_total)

PseudoR2(nb.SP_10_49_kW)
str(mfx.SP_10_49_kW)
round(cbind(mean(vif(nb.SP_10_49_kW)), max(vif(nb.SP_10_49_kW))), digits = 2)
with(nb.SP_10_49_kW, cbind(res.deviance = deviance, df = df.residual,
                        p = pchisq(deviance, df.residual, lower.tail = FALSE)))
vuong(nb.SP_10_49_kW, p.SP_10_49_kW)

PseudoR2(nb.SP_50_499_kW)
str(mfx.SP_50_499_kW)
round(cbind(mean(vif(nb.SP_50_499_kW)), max(vif(nb.SP_50_499_kW))),digits =2)
with(nb.SP_50_499_kW, cbind(res.deviance = deviance, df = df.residual,
                        p = pchisq(deviance, df.residual, lower.tail = FALSE)))
vuong(nb.SP_50_499_kW, p.SP_50_499_kW)

PseudoR2(nb.SP_500_1999kW)
str(mfx.SP_500_1999kW)
round(cbind(mean(vif(nb.SP_500_1999kW)), max(vif(nb.SP_500_1999kW))),digits=2)
with(nb.SP_500_1999kW, cbind(res.deviance = deviance, df = df.residual,
                        p = pchisq(deviance, df.residual, lower.tail = FALSE)))
vuong(nb.SP_500_1999kW, p.SP_500_1999kW)

PseudoR2(nb.SP_2000_kW)
str(nb.SP_2000_kW)
round(cbind(mean(vif(nb.SP_2000_kW)), max(vif(nb.SP_2000_kW))),digits=2)
with(nb.SP_2000_kW, cbind(res.deviance = deviance, df = df.residual,
                          p = pchisq(deviance, df.residual, lower.tail = FALSE)))
vuong(nb.SP_2000_kW, p.SP_2000_kW)
